library(GEOquery)
library(crayon)
library(tidyverse)
library(Seurat)
library(SeuratData)
library(Azimuth)
library(patchwork)
library(Matrix)
library(RCurl)
library(DoubletFinder)
library(scales)
library(SoupX)
library(cowplot)
library(metap)
library(SingleCellExperiment)
library(DropletUtils)
library(AnnotationHub)
library(HGNChelper)
library(ensembldb)
library(multtest)
library(glmGamPoi)
library(pbapply)
library(data.table)
library(ggrepel)
library(ggpubr)
library(stringr)
library(canvasXpress)
library(clinfun)
library(GGally)
library(gridExtra)
library(factoextra)
library(DESeq2)
library(EnhancedVolcano)
library(ComplexHeatmap)
library(limma)
library(fgsea)
library(org.Mm.eg.db)
library(kableExtra)

Experimental design

GEO Accession: GSE264278

Publication Title: SFTPB in serum extracellular vesicles as a biomarker of progressive pulmonary fibrosis

Authors: Enomoto T et al. 

Publication: N/A


Rationale: Pulmonary fibrosis (PF) is a progressive fibrotic disease with a poor prognosis and suboptimal therapeutic options. To construct comprehensive single-cell atlas and identify disease-relevant cell subsets and gene signatures of the murine models of PF pathogenesis and lung repair, time-course singel-cell RNA-seq analysis of bleomycin (BLM) and LPS-induced lung injury/fibrosis models were performed. The authors sampled mouse lungs of untreated, day3, 7, 14, 28, 42, 63 (BLM model, 1.25 mg/kg), day3, 7, 14 (3 mg/kg), and day7, 14, 42 (LPS model, 1mg/ml, 50ul/head) with 5 biological replicates (except BLM 3mg/kg at day 14 which includes 2 biological replicate) and processed by using TAS-Seq protocol (Shichino et al. Commnue Biol 2022). The authors found that some of the early phase-specific markers, and repair phase-specific cell subsets and associated gene signatures. Of these, the authors found that Sftpb was an early marker of lung injury that was specifically expressed on AT2 cells, and SFTPC protein also highly expressed in the serum extracellular vesicles from prognosis-worsen PF-ILD patients. Our dataset might useful to clarify lung injury and repair mechanisms.

Methodology: The authors sampled mouse lungs of untreated, day3, 7, 14, 28, 42, 63 (BLM model, 1.25 mg/kg), day3, 7, 14 (3 mg/kg), and day7, 14, 42 (LPS model) with 5 biological replicates (except BLM 3mg/kg at day 14 which includes 2 biological replicate) and processed by using TAS-Seq protocol (Shichino et al. Commnue Biol 2022).

Load pre-processed Seurat object

# Specify project file path. 
project_path = "~/Documents/Work/UnityHealth/Data_Projects/scRNAseq_LungFibrosisAnalysis/GSE264278/"

# Load pre-processed Seurat objects.
load(file = paste0(project_path, "data/preprocessed_seurat_obj.RData"))

# Set colour palette for cell type.
celltype_col = c("AT1" = "#2f4f4f",
                 "AT2" = "#00bfff",
                 "Basal resting" = "#2e8b57",
                 "Club (non-nasal)" = "#8b008b",
                 "Deuterosomal" = "#00ff00",
                 "Endothelial cells" = "#808000",
                 "Endothelial lympathic" = "#00008b",
                 "Fibroblasts" = "#ff4500",
                 "Fibromyocytes" = "#ffa500",
                 "Immune" = "#f08080",
                 "Ionocyte" = "#f0e68c",
                 "Multiciliated (non-nasal)" = "#ff00ff",
                 "Myofibroblasts" = "#c71585",
                 "Neuroendocrine" = "#00fa9a",
                 "Pericytes" = "#ff1493",
                 "Suprabasal" = "#eee8aa",
                 "Transitional Club-AT2" = "#9370db",
                 "VSMCs" = "#00ffff")

# Set new name for Seurat object to be more informative.
seurat_obj = seurat_cluster

# Collapse different types of endothelial cells into generic endothelial cells.
grep("Endothelial", seurat_obj$cell_type, value = TRUE) %>% unique()
## [1] "Endothelial venous"    "Endothelial capillary" "Endothelial arterial" 
## [4] "Endothelial lymphatic"
seurat_obj$cell_type <- gsub("Endothelial venous|Endothelial capillary|Endothelial arterial", 
                             "Endothelial cells",
                             seurat_obj$cell_type)

# Collapse fibroblasts and myofibroblasts into fibroblasts.
grep("ibroblast", seurat_obj$cell_type, value = TRUE) %>% unique()
## [1] "Fibroblasts"    "Myofibroblasts"
seurat_obj$cell_type <- gsub("Myofibroblasts", 
                             "Fibroblasts",
                             seurat_obj$cell_type)

# Set max overlaps to infinity globally.
options(ggrepel.max.overlaps = Inf)

# Factorize timepoint to arrange it in chronological order.
seurat_obj$timepoint <- factor(seurat_obj$timepoint, levels = c("Untreated",
                                                                "Day 3",
                                                                "Day 7",
                                                                "Day 14",
                                                                "Day 28",
                                                                "Day 42",
                                                                "Day 63"))

# Set Seurat object idents.
Idents(seurat_obj) <- "cell_type"

# Remove unneeded objects.
rm(seurat_cluster)

Build summary table to determine metrics

# Get the variables of interest.
summary_table <- (FetchData(seurat_obj,
                            vars = c("cell_type",
                                     "orig.ident",
                                     "replicate",
                                     "treatment",
                                     "treatment_dosage", 
                                     "timepoint",
                                     "Antxr1",
                                     "Krt8"))) 
# Each timepoint contains unsorted lung tissues from 5 mice.

# Define ANTXR1 expression based on 0 expression threshold.
summary_table <- summary_table %>% mutate(Antxr1_expressed = Antxr1 > 0)

# Define KRT8 expression based on 0 expression threshold.
summary_table <- summary_table %>% mutate(Krt8_expressed = Krt8 > 0)

# Calculate total cells per timepoint.
total_cells_timepoint_mice <- (summary_table %>%
                                 group_by(orig.ident, timepoint) %>%
                                 summarise(total_cells_timepoint = n(),
                                           .groups = "drop"))

# Summarise cell counts per mice per timepoint for each cell type. 
celltype_timepoint_counts_mice <- (summary_table %>% 
                                     group_by(orig.ident, cell_type, timepoint) %>%
                                     summarise(celltype_count = n(),
                                               Antxr1_total_exprs = sum(Antxr1_expressed),
                                               Krt8_total_exprs = sum(Krt8_expressed),
                                               .groups = "drop") %>%
                                     left_join(total_cells_timepoint_mice, 
                                               by = c("orig.ident", "timepoint")))

# Calculate percentages.
celltype_timepoint_counts_mice <- (celltype_timepoint_counts_mice %>%
                                     mutate(celltype_by_totalcells = 
                                              (celltype_count/total_cells_timepoint)*100,
                                            Antxr1_by_totalcells =
                                              (Antxr1_total_exprs/total_cells_timepoint)*100,
                                            Antxr1_by_celltypes =
                                              (Antxr1_total_exprs/celltype_count)*100,
                                            Krt8_by_totalcells =
                                              (Krt8_total_exprs/total_cells_timepoint)*100,
                                            Krt8_by_celltypes =
                                              (Krt8_total_exprs/celltype_count)*100))

# Calculate summary statistics for the calculated percentages by mice.
summary_stats <- (celltype_timepoint_counts_mice %>%
                    group_by(cell_type, timepoint) %>%
                    summarise(
                      mean_percent_celltype = mean(celltype_by_totalcells),
                      sd_percent_celltype = sd(celltype_by_totalcells),
                      mean_percent_antxr1_total = mean(Antxr1_by_totalcells),
                      sd_percent_antxr1_total = sd(Antxr1_by_totalcells),
                      mean_percent_antxr1_celltype = mean(Antxr1_by_celltypes),
                      sd_percent_antxr1_celltype = sd(Antxr1_by_celltypes),
                      mean_percent_krt8_total = mean(Krt8_by_totalcells),
                      sd_percent_krt8_total = sd(Krt8_by_totalcells),
                      mean_percent_krt8_celltype = mean(Krt8_by_celltypes),
                      sd_percent_krt8_celltype = sd(Krt8_by_celltypes),
                      n_mice = n(),
                      se_percent_celltype = sd_percent_celltype / sqrt(n_mice),
                      se_percent_antxr1_total = sd_percent_antxr1_total / sqrt(n_mice),
                      se_percent_antxr1_celltype = sd_percent_antxr1_celltype / sqrt(n_mice),
                      se_percent_krt8_total = sd_percent_krt8_total / sqrt(n_mice),
                      se_percent_krt8_celltype = sd_percent_krt8_celltype / sqrt(n_mice),
                      .groups = "drop"
                    ))

# Remove unneeded objects.
rm(total_cells_timepoint_mice)

1: Global cell type proportion

1.1: All cells

# Visualize the global cell proportion as a UMAP. 
p1 <- DimPlot(seurat_obj,
              reduction = "umap",
              group.by = "cell_type",
              split.by = "timepoint",
              cols = celltype_col,
              label = TRUE,
              label.size = 3,
              label.color = "#02066f",
              repel = TRUE,
              order = TRUE,
              ncol = 2,
              raster = FALSE) + 
  theme_linedraw() +
  theme(axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        strip.text = element_text(size = 15)) +
  NoLegend()

# Visualize cell proportion counts as barplot.
p2 <- celltype_timepoint_counts_mice %>%
  group_by(cell_type, timepoint) %>%
  summarise(n = n(),
            mean = plyr::round_any(mean(celltype_by_totalcells), 
                                   accuracy = 0.01, 
                                   f = ceiling),
            sd = plyr::round_any(sd(celltype_by_totalcells), 
                                 accuracy = 0.01, 
                                 f = ceiling),
            .groups = "drop") %>%
  mutate(se = plyr::round_any(sd/sqrt(n), accuracy = 0.01, f = ceiling)) %>%
  ggplot(aes(x = cell_type,
             y = mean,
             fill = cell_type)) +
  geom_col(show.legend = FALSE) +
  geom_errorbar(aes(ymin = mean-se,
                    ymax = mean+se),
                width = 0.4,
                alpha = 0.9) +
  geom_text(aes(label = mean,
                vjust = 0.5, 
                hjust = -0.8),
            size = 5,
            angle = 90) +
  facet_wrap(~timepoint, ncol = 2) +
  labs(x = "Cell type", 
       y = "% mean cell type for each timepoint
       (Blank = > 0.001 or 0.1%)", 
       fill = element_blank()) +
  ylim(0, 100) +
  scale_fill_manual(values = celltype_col) +
  theme_linedraw() +
  theme(axis.text.x = element_text(size = 12,
                                   angle = 90,
                                   hjust = 1,
                                   vjust = 0.5),
        strip.text = element_text(size = 15))

# Visualize both plots together.
grid.arrange(p1, p2, ncol = 1)

1.2: AT2, Fibroblasts, Endothelial cells and VSMCs only

# Define list of cell types for each subgroups.
AT2 = WhichCells(seurat_obj, idents = c("AT2"))
fibroblasts = WhichCells(seurat_obj, idents = c("Fibroblasts"))
endothelial = WhichCells(seurat_obj, idents = c("Endothelial cells"))
VSMCs = WhichCells(seurat_obj, idents = c("VSMCs"))

# Visualize the global cell proportion as a UMAP. 
p1 <- DimPlot(seurat_obj,
              reduction = "umap",
              group.by = "cell_type",
              split.by = "timepoint",
              cells.highlight = list(AT2,
                                     fibroblasts,
                                     endothelial,
                                     VSMCs),
              cols.highlight = c("#00ffff", "#808000", "#ff4500", "#00bfff"),
              label = TRUE,
              label.size = 3,
              label.color = "#02066f",
              repel = TRUE,
              order = TRUE,
              ncol = 2,
              raster = FALSE) + 
  theme_linedraw() +
  theme(axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        strip.text = element_text(size = 15)) +
  NoLegend()

# Visualize cell proportion counts as barplot.
p2 <- celltype_timepoint_counts_mice %>%
  dplyr::filter(cell_type %in% c("AT2", 
                                 "Fibroblasts", 
                                 "Endothelial cells",
                                 "VSMCs")) %>%
  group_by(cell_type, timepoint) %>%
  summarise(n = n(),
            mean = plyr::round_any(mean(celltype_by_totalcells), 
                                   accuracy = 0.01, 
                                   f = ceiling),
            sd = plyr::round_any(sd(celltype_by_totalcells), 
                                 accuracy = 0.01, 
                                 f = ceiling),
            .groups = "drop") %>%
  mutate(se = plyr::round_any(sd/sqrt(n), accuracy = 0.01, f = ceiling)) %>%
  ggplot(aes(x = cell_type,
             y = mean,
             fill = cell_type)) +
  geom_col(show.legend = FALSE) +
  geom_errorbar(aes(ymin = mean-se,
                    ymax = mean+se),
                width = 0.4,
                alpha = 0.9) +
  geom_text(aes(label = mean,
                vjust = 0.5, 
                hjust = -0.8),
            size = 5,
            angle = 90) +
  facet_wrap(~timepoint, ncol = 2) +
  labs(x = "Cell type", 
       y = "% mean cell type for each timepoint
       (Blank = > 0.001 or 0.1%)", 
       fill = element_blank()) +
  ylim(0, 35) +
  scale_fill_manual(values = celltype_col) +
  theme_linedraw() +
  theme(axis.text.x = element_text(size = 12,
                                   angle = 45,
                                   hjust = 1,
                                   vjust = 1),
        strip.text = element_text(size = 15))

# Visualize both plots together.
grid.arrange(p1, p2, ncol = 1)

# Visualize cell proportion counts as line graph.
celltype_timepoint_counts_mice %>%
  dplyr::filter(cell_type %in% c("AT2", 
                                 "Fibroblasts", 
                                 "Endothelial cells",
                                 "VSMCs")) %>%
  group_by(cell_type, timepoint) %>%
  summarise(n = n(),
            mean = plyr::round_any(mean(celltype_by_totalcells), 
                                   accuracy = 0.01, 
                                   f = ceiling),
            sd = plyr::round_any(sd(celltype_by_totalcells), 
                                 accuracy = 0.01, 
                                 f = ceiling),
            .groups = "drop") %>%
  mutate(se = plyr::round_any(sd/sqrt(n), accuracy = 0.01, f = ceiling)) %>%
  ggplot(aes(x = timepoint,
             y = mean,
             group = cell_type,
             colour = cell_type)) +
  geom_line(size = 0.7) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = mean-se,
                    ymax = mean+se),
                width = 0.4,
                alpha = 0.9) +
  geom_text(aes(label = mean,
                vjust = 0.5, 
                hjust = -0.8),
            size = 5,
            angle = 0,
            repel = TRUE) +
  labs(x = "Timepoint", 
       y = "% mean cell type for each timepoint
       (Blank = > 0.001 or 0.1%)", 
       fill = element_blank()) +
  ylim(0, 30) +
  scale_colour_manual(values = celltype_col) +
  theme_linedraw() +
  theme(axis.text.x = element_text(size = 12,
                                   angle = 0,
                                   hjust = 0.5,
                                   vjust = 1))

2: Expression comparisons between treatments

2.1: Expression comparisons between bleomycin and control for all cell types

Acta2: a-SMA marker Col1a1 & Col1a2: Collagen type I marker Col3a1: Collagen type III marker Cthrc1: Prognostic biomarker for fibrosis

# Visualize the expressions of all cell type clusters.
VlnPlot(seurat_obj, 
        features = c("Acta2", "Col1a2", "Col3a1", "Col1a1", "Antxr1"), 
        group.by = "cell_type", 
        split.by = "treatment",
        split.plot = FALSE,
        stack = TRUE,
        flip = TRUE,
        same.y.lims = TRUE,
        ncol = 1,
        raster = FALSE) +
  theme(legend.position = "top") +
  stat_summary(fun.y = median, 
               geom = "point", 
               size = 1, 
               color = "#000001", 
               position = position_dodge(0.9)) 
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

2.2: Expression comparisons between timepoints for all cell types

Acta2: a-SMA marker Col1a1 & Col1a2: Collagen type I marker Col3a1: Collagen type III marker Mmp2: Matrix degrading enzyme Cthrc1: Prognostic biomarker for fibrosis

# Subset non-zero level expressing cell types
vln_seu <- subset(seurat_obj, 
                  idents = c("AT1", "AT2", "Endothelial cells", 
                             "Endothelial lymphatic",
                             "Fibroblasts", "Fibromyocytes"), 
                  invert = FALSE)

# Get all the collagen genes.
grep("^Col{1}[0-9]+", rownames(vln_seu), value = TRUE) %>% unique() %>% sort()
##  [1] "Col10a1"  "Col11a1"  "Col11a2"  "Col12a1"  "Col13a1"  "Col14a1" 
##  [7] "Col15a1"  "Col16a1"  "Col17a1"  "Col18a1"  "Col19a1"  "Col1a1"  
## [13] "Col1a2"   "Col20a1"  "Col22a1"  "Col23a1"  "Col24a1"  "Col25a1" 
## [19] "Col26a1"  "Col27a1"  "Col28a1"  "Col2a1"   "Col3a1"   "Col4a1"  
## [25] "Col4a2"   "Col4a3"   "Col4a3bp" "Col4a4"   "Col4a5"   "Col4a6"  
## [31] "Col5a1"   "Col5a2"   "Col5a3"   "Col6a1"   "Col6a2"   "Col6a3"  
## [37] "Col6a4"   "Col6a5"   "Col6a6"   "Col7a1"   "Col8a1"   "Col8a2"  
## [43] "Col9a1"   "Col9a2"   "Col9a3"
# Create a gene list of non-zero collagen genes.
col_genes = c("Col1a1", "Col1a2", "Col3a1", "Col4a1", "Col4a2", 
              "Col4a5", "Col5a1", "Col5a2", "Col6a1", "Col6a2", 
              "Col6a3", "Col13a1", "Col14a1", "Col16a1", "Col18a1", 
              "Col23a1")

# Visualize the expressions of all cell type clusters.
VlnPlot(vln_seu, 
        features = c("Acta2", col_genes, "Antxr1"), 
        group.by = "cell_type", 
        split.by = "timepoint",
        split.plot = FALSE,
        stack = TRUE,
        flip = TRUE,
        same.y.lims = TRUE,
        ncol = 1) +
  theme(legend.position = "top",
        axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  stat_summary(fun.y = median, 
               geom = "point", 
               size = 1, 
               color = "#000001", 
               position = position_dodge(0.9))

2.3: Expression of fibrosis signalling kinetics

# Subset non-zero level expressing cell types
vln_seu <- subset(seurat_obj, 
                  idents = c("AT1", "AT2", "Endothelial cells", 
                             "Endothelial lymphatic",
                             "Fibroblasts", "Fibromyocytes"), invert = FALSE)

# Get all the TGFB & PDGF genes.
grep("^Tgfb+", rownames(vln_seu), value = TRUE) %>% unique() %>% sort()
##  [1] "Tgfb1"    "Tgfb1i1"  "Tgfb2"    "Tgfb3"    "Tgfbi"    "Tgfbr1"  
##  [7] "Tgfbr2"   "Tgfbr3"   "Tgfbr3l"  "Tgfbrap1"
grep("^Pdgf+", rownames(vln_seu), value = TRUE) %>% unique() %>% sort()
## [1] "Pdgfa"  "Pdgfb"  "Pdgfc"  "Pdgfd"  "Pdgfra" "Pdgfrb" "Pdgfrl"
fibrosis_genes = c("Tgfb1", "Tgfb2", "Tgfb3", "Pdgfa", "Pdgfb", "Ctgf")

# Visualize the level of expressions of fibrosis genes in subsetted cell types.
VlnPlot(vln_seu, 
        features = fibrosis_genes, 
        group.by = "cell_type", 
        split.by = "timepoint",
        split.plot = FALSE,
        stack = TRUE,
        flip = TRUE,
        same.y.lims = TRUE,
        ncol = 1) +
  theme(legend.position = "top",
        axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  stat_summary(fun.y = median, 
               geom = "point", 
               size = 1, 
               color = "#000001", 
               position = position_dodge(0.9))

# Visualize the degree of expression of fibrosis genes in subsetted cell types.
fibrosis_genes = c("Ctgf", "Pdgfb", "Pdgfa", "Tgfb3", "Tgfb2", "Tgfb1")

DotPlot(vln_seu,
        features = fibrosis_genes) +
  scale_color_gradient2(low = "#c85200", mid = "#cccccc", high = "#366785") +
  coord_flip()

3: ANTXR1 expression

3.1: ANTXR1 expression by global total cells

Acta2: a-SMA marker Col1a1 & Col1a2: Collagen type I marker Col3a1: Collagen type III marker Cthrc1: Prognostic biomarker for fibrosis

# Visualize the ANTXR1+ cell type clusters.
FeaturePlot(seurat_obj, 
            reduction = "umap", 
            features = "Antxr1",
            split.by = "timepoint",
            pt.size = 0.8,
            label = TRUE,
            label.size = 3,
            repel = TRUE,
            order = TRUE,
            ncol = 2,
            cols = c("#d3d3d3", "#edc9af", "#6e260e")) +
  patchwork::plot_layout(ncol = 3, nrow = 3) +
  theme(axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        strip.text = element_text(size = 15)) 


# Visualize ANTXR1 expressed by total cells as barplot.
celltype_timepoint_counts_mice %>%
  group_by(cell_type, timepoint) %>%
  summarise(n = n(),
            mean = plyr::round_any(mean(Antxr1_by_totalcells),
                                   accuracy = 0.01,
                                   f = ceiling),
            sd = plyr::round_any(sd(Antxr1_by_totalcells),
                                 accuracy = 0.01,
                                 f = ceiling),
            .groups = "drop") %>%
  mutate(se = plyr::round_any(sd/sqrt(n),
                              accuracy = 0.01,
                              f = ceiling)) %>%
  ggplot(aes(x = cell_type,
             y = mean,
             fill = cell_type)) +
  geom_col(show.legend = FALSE) +
  geom_errorbar(aes(ymin = mean-se,
                    ymax = mean+se),
                width = 0.4,
                alpha = 0.9) +
  geom_text(aes(label = mean,
                vjust = 0.5, 
                hjust = -0.8),
            size = 5,
            angle = 90) +
  facet_wrap(~timepoint) +
  labs(x = "Cell type", 
       y = "% ANTXR1+ expressed per total cells for each timepoint
       (Blank = > 0.001 or 0.1%)", 
       fill = element_blank()) +
  ylim(0, 20) +
  scale_fill_manual(values = celltype_col) +
  theme_linedraw() +
  theme(axis.text.x = element_text(size = 12,
                                   angle = 90,
                                   hjust = 1,
                                   vjust = 0.5),
        strip.text = element_text(size = 15))


# Visualize ANTXR1 expressed by total cells as barplot in selected cell types.
celltype_timepoint_counts_mice %>%
  dplyr::filter(cell_type %in% c("AT2", 
                                 "Fibroblasts", 
                                 "Endothelial cells",
                                 "VSMCs")) %>%
  group_by(cell_type, timepoint) %>%
  summarise(n = n(),
            mean = plyr::round_any(mean(Antxr1_by_totalcells),
                                   accuracy = 0.01,
                                   f = ceiling),
            sd = plyr::round_any(sd(Antxr1_by_totalcells),
                                 accuracy = 0.01,
                                 f = ceiling),
            .groups = "drop") %>%
  mutate(se = plyr::round_any(sd/sqrt(n),
                              accuracy = 0.01,
                              f = ceiling)) %>%
  ggplot(aes(x = cell_type,
             y = mean,
             fill = cell_type)) +
  geom_col(show.legend = FALSE) +
  geom_errorbar(aes(ymin = mean-se,
                    ymax = mean+se),
                width = 0.4,
                alpha = 0.9) +
  geom_text(aes(label = mean,
                vjust = 0.5, 
                hjust = -0.8),
            size = 5,
            angle = 90) +
  facet_wrap(~timepoint) +
  labs(x = "Cell type", 
       y = "% ANTXR1+ expressed per total cells for each timepoint
       (Blank = > 0.001 or 0.1%)", 
       fill = element_blank()) +
  ylim(0, 20) +
  scale_fill_manual(values = celltype_col) +
  theme_linedraw() +
  theme(axis.text.x = element_text(size = 12,
                                   angle = 45,
                                   hjust = 1,
                                   vjust = 1),
        strip.text = element_text(size = 15))

# Visualize ANTXR1 expressed by total cells as line graph in selected cell types.
celltype_timepoint_counts_mice %>%
  dplyr::filter(cell_type %in% c("AT2", 
                                 "Fibroblasts", 
                                 "Endothelial cells",
                                 "VSMCs")) %>%
  group_by(cell_type, timepoint) %>%
  summarise(n = n(),
            mean = plyr::round_any(mean(Antxr1_by_totalcells),
                                   accuracy = 0.01,
                                   f = ceiling),
            sd = plyr::round_any(sd(Antxr1_by_totalcells),
                                 accuracy = 0.01,
                                 f = ceiling),
            .groups = "drop") %>%
  mutate(se = plyr::round_any(sd/sqrt(n),
                              accuracy = 0.01,
                              f = ceiling)) %>%
  ggplot(aes(x = timepoint,
             y = mean,
             group = cell_type,
             colour = cell_type)) +
  geom_line(size = 0.7) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = mean-se,
                    ymax = mean+se),
                width = 0.4,
                alpha = 0.9) +
  geom_text(aes(label = mean,
                vjust = 0.5, 
                hjust = -0.8),
            size = 5,
            angle = 0,
            repel = TRUE) +
  labs(x = "Timepoint", 
       y = "% ANTXR1+ expressed per total cells for each timepoint
       (Blank = > 0.001 or 0.1%)", 
       fill = element_blank()) +
  ylim(0, 20) +
  scale_colour_manual(values = celltype_col) +
  theme_linedraw() +
  theme(axis.text.x = element_text(size = 12,
                                   angle = 0,
                                   hjust = 0.5,
                                   vjust = 1))

3.2: ANTXR1 expression by total cells per cell type

Acta2: a-SMA marker Col1a1 & Col1a2: Collagen type I marker Col3a1: Collagen type III marker Cthrc1: Prognostic biomarker for fibrosis

# Visualize the ANTXR1+ cell type clusters.
FeaturePlot(seurat_obj, 
            reduction = "umap", 
            features = "Antxr1",
            split.by = "timepoint",
            pt.size = 0.8,
            label = TRUE,
            label.size = 3,
            repel = TRUE,
            order = TRUE,
            cols = c("#d3d3d3", "#edc9af", "#6e260e")) +
  patchwork::plot_layout(ncol = 3, nrow = 3) +
  theme(axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        strip.text = element_text(size = 15)) 


# Visualize ANTXR1 expressed by total cells per cell type as barplot.
celltype_timepoint_counts_mice %>%
  group_by(cell_type, timepoint) %>%
  summarise(n = n(),
            mean = plyr::round_any(mean(Antxr1_by_celltypes),
                                   accuracy = 0.01,
                                   f = ceiling),
            sd = plyr::round_any(sd(Antxr1_by_celltypes),
                                 accuracy = 0.01,
                                 f = ceiling),
            .groups = "drop") %>%
  mutate(se = plyr::round_any(sd/sqrt(n),
                              accuracy = 0.01,
                              f = ceiling)) %>%
  ggplot(aes(x = cell_type,
             y = mean,
             fill = cell_type)) +
  geom_col(show.legend = FALSE) +
  geom_errorbar(aes(ymin = mean-se,
                    ymax = mean+se),
                width = 0.4,
                alpha = 0.9) +
  geom_text(aes(label = mean,
                vjust = 0.5, 
                hjust = -0.8),
            size = 5,
            angle = 90) +
  facet_wrap(~timepoint) +
  labs(x = "Cell type", 
       y = "% mean ANTXR1+ expressed per total cells 
       for each cell type at each timepoint
       (Blank = > 0.001 or 0.1%)", 
       fill = element_blank()) +
  ylim(0, 110) +
  scale_fill_manual(values = celltype_col) +
  theme_linedraw() +
  theme(axis.text.x = element_text(size = 12,
                                   angle = 90,
                                   hjust = 1,
                                   vjust = 0.5),
        strip.text = element_text(size = 15))


# Visualize ANTXR1 expressed by total cells per cell type as barplot in selected cell types.
celltype_timepoint_counts_mice %>%
  dplyr::filter(cell_type %in% c("AT2", 
                                 "Fibroblasts", 
                                 "Endothelial cells",
                                 "VSMCs")) %>%
  group_by(cell_type, timepoint) %>%
  summarise(n = n(),
            mean = plyr::round_any(mean(Antxr1_by_celltypes),
                                   accuracy = 0.01,
                                   f = ceiling),
            sd = plyr::round_any(sd(Antxr1_by_celltypes),
                                 accuracy = 0.01,
                                 f = ceiling),
            .groups = "drop") %>%
  mutate(se = plyr::round_any(sd/sqrt(n),
                              accuracy = 0.01,
                              f = ceiling)) %>%
  ggplot(aes(x = cell_type,
             y = mean,
             fill = cell_type)) +
  geom_col(show.legend = FALSE) +
  geom_errorbar(aes(ymin = mean-se,
                    ymax = mean+se),
                width = 0.4,
                alpha = 0.9) +
  geom_text(aes(label = mean,
                vjust = 0.5, 
                hjust = -0.8),
            size = 5,
            angle = 90) +
  facet_wrap(~timepoint) +
  labs(x = "Cell type", 
       y = "% mean ANTXR1+ expressed per total cells 
       for each cell type at each timepoint
       (Blank = > 0.001 or 0.1%)", 
       fill = element_blank()) +
  ylim(0, 110) +
  scale_fill_manual(values = celltype_col) +
  theme_linedraw() +
  theme(axis.text.x = element_text(size = 12,
                                   angle = 45,
                                   hjust = 1,
                                   vjust = 1),
        strip.text = element_text(size = 15))

# Visualize ANTXR1 expressed by total cells per cell type as line graph in selected cell types.
celltype_timepoint_counts_mice %>%
  dplyr::filter(cell_type %in% c("AT2", 
                                 "Fibroblasts", 
                                 "Endothelial cells",
                                 "VSMCs")) %>%
  group_by(cell_type, timepoint) %>%
  summarise(n = n(),
            mean = plyr::round_any(mean(Antxr1_by_celltypes),
                                   accuracy = 0.01,
                                   f = ceiling),
            sd = plyr::round_any(sd(Antxr1_by_celltypes),
                                 accuracy = 0.01,
                                 f = ceiling),
            .groups = "drop") %>%
  mutate(se = plyr::round_any(sd/sqrt(n),
                              accuracy = 0.01,
                              f = ceiling)) %>%
  ggplot(aes(x = timepoint,
             y = mean,
             group = cell_type,
             colour = cell_type)) +
  geom_line(size = 0.7) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = mean-se,
                    ymax = mean+se),
                width = 0.4,
                alpha = 0.9) +
  geom_text(aes(label = mean,
                vjust = 0.5, 
                hjust = -0.8),
            size = 5,
            angle = 0,
            repel = TRUE) +
  labs(x = "Timepoint", 
       y = "% mean ANTXR1+ expressed per total cells 
       for each cell type at each timepoint
       (Blank = > 0.001 or 0.1%)", 
       fill = element_blank()) +
  ylim(0, 110) +
  scale_colour_manual(values = celltype_col) +
  theme_linedraw() +
  theme(axis.text.x = element_text(size = 12,
                                   angle = 0,
                                   hjust = 0.5,
                                   vjust = 1))

4: ANTXR1 expression comparison - AT2

# Subset cell type of interest - AT2 cells.
AT2_cells <- subset(seurat_obj, idents = "AT2", invert = FALSE)
AT2_cells$treatment <- factor(AT2_cells$treatment, 
                              levels = c("untreated", 
                                         "bleomycin administered intratracheally"))

# Make expression comparisons between metadata of interest.
source("~/Documents/Work/UnityHealth/Data_Projects/VlnPlot_stat.R")
VlnPlot_stat(object = AT2_cells,
             gene_signature = c("Antxr1"),
             test_sign = list(c("Untreated", "Day 3"), 
                              c("Untreated", "Day 7"),
                              c("Untreated", "Day 14"),
                              c("Untreated", "Day 28"),
                              c("Untreated", "Day 42"),
                              c("Untreated", "Day 63")),
             group_name = "timepoint",
             title = "ANTXR1 pairwise comparisons between control and BLM-treated AT2 cells 
             at different timepoints",
             x_angle = 0,
             hjust = 0.5,
             vjust = 1) 

# Remove unneeded objects.
rm(AT2_cells)

5: ANTXR1 expression comparison - Fibroblasts

# Subset cell type of interest - Fibroblasts.
Fibro_cells <- subset(seurat_obj, 
                      idents = c("Fibroblasts"), 
                      invert = FALSE)
Fibro_cells$treatment <- factor(Fibro_cells$treatment, 
                                levels = c("untreated", 
                                           "bleomycin administered intratracheally"))

# Make expression comparisons between metadata of interest.
source("~/Documents/Work/UnityHealth/Data_Projects/VlnPlot_stat.R")
VlnPlot_stat(object = Fibro_cells,
             gene_signature = c("Antxr1"),
             test_sign = list(c("Untreated", "Day 3"), 
                              c("Untreated", "Day 7"),
                              c("Untreated", "Day 14"),
                              c("Untreated", "Day 28"),
                              c("Untreated", "Day 42"),
                              c("Untreated", "Day 63")),
             group_name = "timepoint",
             title = "ANTXR1 pairwise comparisons between control and BLM-treated fibroblasts 
             at different timepoints",
             x_angle = 0,
             hjust = 0.5,
             vjust = 1) 

# Remove unneeded objects.
rm(Fibro_cells)

6: ANTXR1 expression comparison - Endothelial cells

# Subset cell type of interest - Endothelial cells.
EC_cells <- subset(seurat_obj, 
                   idents = c("Endothelial cells"), 
                   invert = FALSE)
EC_cells$treatment <- factor(EC_cells$treatment, 
                             levels = c("untreated", 
                                        "bleomycin administered intratracheally"))

# Make expression comparisons between metadata of interest.
source("~/Documents/Work/UnityHealth/Data_Projects/VlnPlot_stat.R")
VlnPlot_stat(object = EC_cells,
             gene_signature = c("Antxr1"),
             test_sign = list(c("Untreated", "Day 3"), 
                              c("Untreated", "Day 7"),
                              c("Untreated", "Day 14"),
                              c("Untreated", "Day 28"),
                              c("Untreated", "Day 42"),
                              c("Untreated", "Day 63")),
             group_name = "timepoint",
             title = "ANTXR1 pairwise comparisons between control and BLM-treated 
             endothelial cells at different timepoints",
             x_angle = 0,
             hjust = 0.5,
             vjust = 1) 

# Remove unneeded objects.
rm(EC_cells)

7: ANTXR1 expression comparison - VSMCs

# Subset cell type of interest - VSMCs.
VSMCs <- subset(seurat_obj, 
                idents = c("VSMCs"), 
                   invert = FALSE)
VSMCs$treatment <- factor(VSMCs$treatment, 
                          levels = c("untreated", 
                                     "bleomycin administered intratracheally"))

# Make expression comparisons between metadata of interest.
source("~/Documents/Work/UnityHealth/Data_Projects/VlnPlot_stat.R")
VlnPlot_stat(object = VSMCs,
             gene_signature = c("Antxr1"),
             test_sign = list(c("Untreated", "Day 3"), 
                              c("Untreated", "Day 7"),
                              c("Untreated", "Day 14"),
                              c("Untreated", "Day 28"),
                              c("Untreated", "Day 42"),
                              c("Untreated", "Day 63")),
             group_name = "timepoint",
             title = "ANTXR1 pairwise comparisons between control and BLM-treated 
             VSMCs at different timepoints",
             x_angle = 0,
             hjust = 0.5,
             vjust = 1) 

# Remove unneeded objects.
rm(VSMCs)

8: DEG analysis

8.1: Set-up data for DEG

# Check if default ident of the Seurat object is cell type. 
all(Idents(seurat_obj) == seurat_obj$cell_type) # True. Default idents are cell types.
## [1] TRUE
# Create an aggregate variable with cell type and timepoint.
seurat_obj$deg_variable <- ifelse(seurat_obj$timepoint == "Untreated", 
                                  paste(seurat_obj$cell_type, 
                                        seurat_obj$timepoint, 
                                        sep = "_"),
                                  paste(seurat_obj$cell_type, 
                                        "BLM", 
                                        seurat_obj$timepoint, 
                                        sep = "_"))

# Aggregate counts based on cell_type, timepoint and accession ID (i.e. mice ID).
pseudo_seu <- AggregateExpression(seurat_obj,
                                  assays = "RNA",
                                  group.by = c("deg_variable", "orig.ident"),
                                  return.seurat = TRUE)
## Names of identity class contain underscores ('_'), replacing with dashes ('-')
## Centering and scaling data matrix
## 
## This message is displayed once every 8 hours.
# Use the new metadata column as the default ident. 
head(Cells(pseudo_seu))
## [1] "AT1-BLM-Day 14_day14BLM1.25mg.1" "AT1-BLM-Day 14_day14BLM1.25mg.2"
## [3] "AT1-BLM-Day 14_day14BLM1.25mg.3" "AT1-BLM-Day 14_day14BLM1.25mg.4"
## [5] "AT1-BLM-Day 14_day14BLM1.25mg.5" "AT1-BLM-Day 14_day14BLM3mg.1"
# Set deg_variable as the default ident.
Idents(pseudo_seu) <- "deg_variable"
# Connect to AnnotationHub.
ah <- AnnotationHub()

# Access the Ensembl database for organism.
ahDb <- query(ah, 
              pattern = c("Mus musculus", "EnsDb"), 
              ignore.case = TRUE)

# Acquire the latest annotation files.
id <- ahDb %>%
        mcols() %>%
        rownames() %>%
        tail(n = 1)

# Download the appropriate Ensembldb database.
edb <- ah[[id]]
## downloading 1 resources
## retrieving 1 resource
## loading from cache
# Extract gene-level information from database.
annotations <- genes(edb, 
                     return.type = "data.frame")

# Select annotations of interest.
annotations <- annotations %>%
        dplyr::select(gene_id, gene_name, seq_name, gene_biotype, description)

# Remove unneeded objects.
rm(ah, ahDb, edb, id)

8.2: DEG - Fibroblasts - BLM day 3 vs. Untreated

# Find differential expression genes between BLM day 3 and untreated.
# Positive logFC means ident.1 is upregulated relative to ident.2.
grep("Fibroblasts", Idents(pseudo_seu), value = TRUE) %>% unique()
## [1] "Fibroblasts-BLM-Day 14" "Fibroblasts-BLM-Day 28" "Fibroblasts-BLM-Day 3" 
## [4] "Fibroblasts-BLM-Day 42" "Fibroblasts-BLM-Day 63" "Fibroblasts-BLM-Day 7" 
## [7] "Fibroblasts-Untreated"
fib_blm3_unt <- FindMarkers(pseudo_seu, 
                            ident.1 = "Fibroblasts-BLM-Day 3",
                            ident.2 = "Fibroblasts-Untreated",
                            test.use = "DESeq2")
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
head(fib_blm3_unt, n = 15)
##                   p_val avg_log2FC pct.1 pct.2     p_val_adj
## Grip2      0.000000e+00  -1.872587     1     1  0.000000e+00
## Irf7       0.000000e+00   3.401018     1     1  0.000000e+00
## Lgals3bp   0.000000e+00   2.353194     1     1  0.000000e+00
## Serpina3n  0.000000e+00   3.037906     1     1  0.000000e+00
## Zbp1      4.402004e-308   2.446702     1     1 1.514598e-303
## Slc39a14  1.141460e-298   1.917295     1     1 3.927423e-294
## Cfb       1.156783e-288   3.044687     1     1 3.980144e-284
## Timp1     1.809557e-283   4.517787     1     1 6.226142e-279
## Col5a2    1.576654e-262   1.337088     1     1 5.424794e-258
## Ifi27l2a  3.775064e-245   2.819281     1     1 1.298886e-240
## Eif2ak2   1.802792e-244   1.162059     1     1 6.202867e-240
## Vcan      6.501954e-234   2.256318     1     1 2.237127e-229
## Gm20547   4.060094e-223   2.369464     1     1 1.396957e-218
## AI506816  1.876216e-211   1.403752     1     1 6.455497e-207
## Vldlr     8.307647e-200  -1.642992     1     1 2.858412e-195
# Filter out genes that are p.adjusted > 0.05.
dim(fib_blm3_unt)[1] # 32,170 hits before filtering.
## [1] 32199
fib_blm3_unt_sig <- fib_blm3_unt[which(fib_blm3_unt$p_val_adj < 0.05),]
dim(fib_blm3_unt_sig) # 3,327 after filtering for p-adjusted values.
## [1] 3366    5
# Add gene annotations to the DEG output.
fib_blm3_unt_sig <- fib_blm3_unt_sig %>% rownames_to_column("geneID")
fib_blm3_unt_sig <- dplyr::left_join(fib_blm3_unt_sig, annotations,
                                     by = c("geneID" = "gene_name"))

# DEG output.
volcano_input = fib_blm3_unt_sig
EnhancedVolcano(volcano_input, 
                volcano_input$geneID, 
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c(volcano_input[c(volcano_input$p_val_adj < 1e-5 & 
                                                (volcano_input$avg_log2FC > 2.5 |
                                                   volcano_input$avg_log2FC < -2.5)),]$geneID,
                              "Antxr1"),
                title = "Fibroblasts BLM Day 3 vs. Fibroblasts Untreated",
                pointSize = 1.0,
                colAlpha = 0.8,
                drawConnectors = TRUE,
                legendPosition = "bottom",
                legendLabSize = 12.0,
                legendIconSize = 3.0)
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest
## non-zero p-value...

8.3: DEG - Fibroblasts - BLM day 7 vs. Untreated

# Find differential expression genes between BLM day 7 and untreated.
# Positive logFC means ident.1 is upregulated relative to ident.2.
grep("Fibroblasts", Idents(pseudo_seu), value = TRUE) %>% unique()
## [1] "Fibroblasts-BLM-Day 14" "Fibroblasts-BLM-Day 28" "Fibroblasts-BLM-Day 3" 
## [4] "Fibroblasts-BLM-Day 42" "Fibroblasts-BLM-Day 63" "Fibroblasts-BLM-Day 7" 
## [7] "Fibroblasts-Untreated"
fib_blm7_unt <- FindMarkers(pseudo_seu, 
                            ident.1 = "Fibroblasts-BLM-Day 7",
                            ident.2 = "Fibroblasts-Untreated",
                            test.use = "DESeq2")
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
head(fib_blm7_unt, n = 15)
##                       p_val avg_log2FC pct.1 pct.2     p_val_adj
## Grip2          0.000000e+00 -1.7804755     1     1  0.000000e+00
## Eln           1.609135e-277  2.6388889     1     1 5.536549e-273
## Serpina3n     2.316143e-252  3.0077009     1     1 7.969153e-248
## Sparc         1.937451e-242  1.6111741     1     1 6.666188e-238
## Igfbp3        3.932059e-195 -4.1856265     1     1 1.352904e-190
## Gm9844        6.082723e-149  0.9912654     1     1 2.092882e-144
## B230206H07Rik 1.496656e-141 -1.4297873     1     1 5.149546e-137
## Cygb          1.594341e-138  1.5267568     1     1 5.485648e-134
## Gm19220       3.251355e-138  0.4769499     1     1 1.118694e-133
## Ppic          2.071173e-134  1.3238213     1     1 7.126285e-130
## Fst           3.659802e-134  2.7298396     1     1 1.259228e-129
## Fbn1          1.179029e-130  1.0584070     1     1 4.056686e-126
## Gm15042       9.971023e-129 -1.3580344     1     1 3.430730e-124
## Vcan          2.016506e-127  1.5891587     1     1 6.938194e-123
## C1qtnf6       1.461743e-126  1.8409003     1     1 5.029418e-122
# Filter out genes that are p.adjusted > 0.05.
dim(fib_blm7_unt)[1] # 32,733 hits before filtering.
## [1] 32775
fib_blm7_unt_sig <- fib_blm7_unt[which(fib_blm7_unt$p_val_adj < 0.05),]
dim(fib_blm7_unt_sig)[1] # 3,956 after filtering for p-adjusted values.
## [1] 4028
# Add gene annotations to the DEG output.
fib_blm7_unt_sig <- fib_blm7_unt_sig %>% rownames_to_column("geneID")
fib_blm7_unt_sig <- dplyr::left_join(fib_blm7_unt_sig, annotations,
                                     by = c("geneID" = "gene_name"))

# DEG output.
volcano_input = fib_blm7_unt_sig
EnhancedVolcano(volcano_input, 
                volcano_input$geneID, 
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c(volcano_input[c(volcano_input$p_val_adj < 1e-5 & 
                                                (volcano_input$avg_log2FC > 2.5 |
                                                   volcano_input$avg_log2FC < -2.5)),]$geneID,
                              "Antxr1"),
                title = "Fibroblasts BLM Day 7 vs. Fibroblasts Untreated",
                pointSize = 1.0,
                colAlpha = 0.8,
                drawConnectors = TRUE,
                legendPosition = "bottom",
                legendLabSize = 12.0,
                legendIconSize = 3.0)
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest
## non-zero p-value...

8.4: DEG - Fibroblasts - BLM day 14 vs. Untreated

# Find differential expression genes between BLM day 14 and untreated.
# Positive logFC means ident.1 is upregulated relative to ident.2.
grep("Fibroblasts", Idents(pseudo_seu), value = TRUE) %>% unique()
## [1] "Fibroblasts-BLM-Day 14" "Fibroblasts-BLM-Day 28" "Fibroblasts-BLM-Day 3" 
## [4] "Fibroblasts-BLM-Day 42" "Fibroblasts-BLM-Day 63" "Fibroblasts-BLM-Day 7" 
## [7] "Fibroblasts-Untreated"
fib_blm14_unt <- FindMarkers(pseudo_seu, 
                             ident.1 = "Fibroblasts-BLM-Day 14",
                             ident.2 = "Fibroblasts-Untreated",
                             test.use = "DESeq2")
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
head(fib_blm14_unt, n = 15)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
## Grip2    0.000000e+00  -1.570983     1     1  0.000000e+00
## Ppic    6.734934e-295   1.793365     1     1 2.317289e-290
## Sparc   9.156804e-295   1.865336     1     1 3.150581e-290
## Ltbp4   2.034565e-284  -1.800311     1     1 7.000327e-280
## Ltbp2   3.973542e-273   3.214244     1     1 1.367177e-268
## Eln     7.764271e-266   2.789974     1     1 2.671453e-261
## Gm13341 1.399028e-259   1.219839     1     1 4.813635e-255
## Mmp14   3.940878e-245   1.733639     1     1 1.355938e-240
## Gm9844  1.139528e-213   1.216244     1     1 3.920773e-209
## Lpl     3.359516e-172   2.445588     1     1 1.155909e-167
## Sfrp1   7.287726e-169   3.122706     1     1 2.507488e-164
## Gstm1   1.640072e-164  -2.203675     1     1 5.642996e-160
## Igfbp3  1.447322e-163  -3.261039     1     1 4.979800e-159
## Gsn     1.304212e-160  -2.195047     1     1 4.487403e-156
## Rps26   8.451137e-160   1.081180     1     1 2.907783e-155
# Filter out genes that are p.adjusted > 0.05.
dim(fib_blm14_unt)[1] # 32,083 hits before filtering.
## [1] 32107
fib_blm14_unt_sig <- fib_blm14_unt[which(fib_blm14_unt$p_val_adj < 0.05),]
dim(fib_blm14_unt_sig)[1] # 3,235 after filtering for p-adjusted values.
## [1] 3240
# Add gene annotations to the DEG output.
fib_blm14_unt_sig <- fib_blm14_unt_sig %>% rownames_to_column("geneID")
fib_blm14_unt_sig <- dplyr::left_join(fib_blm14_unt_sig, annotations,
                                      by = c("geneID" = "gene_name"))

# DEG output.
volcano_input = fib_blm14_unt_sig
EnhancedVolcano(volcano_input, 
                volcano_input$geneID, 
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c(volcano_input[c(volcano_input$p_val_adj < 1e-5 & 
                                                (volcano_input$avg_log2FC > 2.5 |
                                                   volcano_input$avg_log2FC < -2.5)),]$geneID,
                              "Antxr1"),
                title = "Fibroblasts BLM Day 14 vs. Fibroblasts Untreated",
                pointSize = 1.0,
                colAlpha = 0.8,
                drawConnectors = TRUE,
                legendPosition = "bottom",
                legendLabSize = 12.0,
                legendIconSize = 3.0)
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest
## non-zero p-value...

8.5: DEG - Fibroblasts - BLM day 28 vs. Untreated

# Find differential expression genes between BLM day 28 and untreated.
# Positive logFC means ident.1 is upregulated relative to ident.2.
grep("Fibroblasts", Idents(pseudo_seu), value = TRUE) %>% unique()
## [1] "Fibroblasts-BLM-Day 14" "Fibroblasts-BLM-Day 28" "Fibroblasts-BLM-Day 3" 
## [4] "Fibroblasts-BLM-Day 42" "Fibroblasts-BLM-Day 63" "Fibroblasts-BLM-Day 7" 
## [7] "Fibroblasts-Untreated"
fib_blm28_unt <- FindMarkers(pseudo_seu, 
                             ident.1 = "Fibroblasts-BLM-Day 28",
                             ident.2 = "Fibroblasts-Untreated",
                             test.use = "DESeq2")
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
head(fib_blm28_unt, n = 15)
##               p_val avg_log2FC pct.1 pct.2     p_val_adj
## Grip2 1.035914e-300 -0.9331956     1     1 3.564271e-296
## Eln   4.518838e-256  2.6428744     1     1 1.554797e-251
## Ppic  3.628170e-198  1.6585257     1     1 1.248344e-193
## Mfap5 1.756821e-161  1.4454413     1     1 6.044693e-157
## Hspg2 3.622938e-159  1.3889420     1     1 1.246544e-154
## Ltbp2 2.534226e-158  3.6505420     1     1 8.719510e-154
## Ppl   1.165267e-142 -1.3424678     1     1 4.009334e-138
## Mmp2  5.800168e-137  1.3555032     1     1 1.995664e-132
## Cst3  3.618902e-131  0.8260906     1     1 1.245156e-126
## Lpl   6.041385e-130  2.5098725     1     1 2.078659e-125
## Neat1 2.669330e-126 -1.2733201     1     1 9.184364e-122
## Pdzd2 2.026379e-120 -1.7855659     1     1 6.972161e-116
## Foxp4 4.147150e-118  0.8367008     1     1 1.426910e-113
## Gdnf  9.622469e-116  1.1668084     1     1 3.310803e-111
## P4ha3 4.383610e-114  1.4927192     1     1 1.508269e-109
# Filter out genes that are p.adjusted > 0.05.
dim(fib_blm28_unt)[1] # 32,675 hits before filtering.
## [1] 32690
fib_blm28_unt_sig <- fib_blm28_unt[which(fib_blm28_unt$p_val_adj < 0.05),]
dim(fib_blm28_unt_sig)[1] # 2,810 after filtering for p-adjusted values.
## [1] 2801
# Add gene annotations to the DEG output.
fib_blm28_unt_sig <- fib_blm28_unt_sig %>% rownames_to_column("geneID")
fib_blm28_unt_sig <- dplyr::left_join(fib_blm28_unt_sig, annotations,
                                      by = c("geneID" = "gene_name"))

# DEG output.
volcano_input = fib_blm28_unt_sig
EnhancedVolcano(volcano_input, 
                volcano_input$geneID, 
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c(volcano_input[c(volcano_input$p_val_adj < 1e-5 & 
                                                (volcano_input$avg_log2FC > 2 |
                                                   volcano_input$avg_log2FC < -2)),]$geneID,
                              "Antxr1"),
                title = "Fibroblasts BLM Day 28 vs. Fibroblasts Untreated",
                pointSize = 1.0,
                colAlpha = 0.8,
                drawConnectors = TRUE,
                legendPosition = "bottom",
                legendLabSize = 12.0,
                legendIconSize = 3.0)

8.6: DEG - Fibroblasts - BLM day 42 vs. Untreated

# Find differential expression genes between BLM day 42 and untreated.
# Positive logFC means ident.1 is upregulated relative to ident.2.
grep("Fibroblasts", Idents(pseudo_seu), value = TRUE) %>% unique()
## [1] "Fibroblasts-BLM-Day 14" "Fibroblasts-BLM-Day 28" "Fibroblasts-BLM-Day 3" 
## [4] "Fibroblasts-BLM-Day 42" "Fibroblasts-BLM-Day 63" "Fibroblasts-BLM-Day 7" 
## [7] "Fibroblasts-Untreated"
fib_blm42_unt <- FindMarkers(pseudo_seu, 
                             ident.1 = "Fibroblasts-BLM-Day 42",
                             ident.2 = "Fibroblasts-Untreated",
                             test.use = "DESeq2")
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
head(fib_blm42_unt, n = 15)
##                       p_val avg_log2FC pct.1 pct.2     p_val_adj
## Grip2          0.000000e+00 -1.0686665     1     1  0.000000e+00
## Hspg2         8.367822e-179  1.3835076     1     1 2.879117e-174
## Gas6          2.196524e-142  0.8988378     1     1 7.557579e-138
## Enpep         2.519354e-133 -0.9611424     1     1 8.668342e-129
## Nbl1          1.242947e-132  0.8336097     1     1 4.276606e-128
## Serpinf1      1.702681e-128  0.9298654     1     1 5.858414e-124
## Gm48159       1.401517e-123 -0.7099277     1     1 4.822200e-119
## B230206H07Rik 1.159610e-122 -0.6573852     1     1 3.989872e-118
## Nfix          1.699726e-117  0.4480881     1     1 5.848247e-113
## Emc10         1.660963e-113  0.6973004     1     1 5.714875e-109
## Gm14964       1.098970e-100 -1.4226445     1     1  3.781227e-96
## CT010467.1     1.713452e-99  0.8018592     1     1  5.895475e-95
## Mmp14          4.519884e-99  1.5147570     1     1  1.555157e-94
## Vegfa          5.102301e-99 -1.6444721     1     1  1.755549e-94
## Mmp2           7.980692e-99  1.0265583     1     1  2.745917e-94
# Filter out genes that are p.adjusted > 0.05.
dim(fib_blm42_unt)[1] # 32,217 hits before filtering.
## [1] 32233
fib_blm42_unt_sig <- fib_blm42_unt[which(fib_blm42_unt$p_val_adj < 0.05),]
dim(fib_blm42_unt_sig)[1] # 2,500 after filtering for p-adjusted values.
## [1] 2471
# Add gene annotations to the DEG output.
fib_blm42_unt_sig <- fib_blm42_unt_sig %>% rownames_to_column("geneID")
fib_blm42_unt_sig <- dplyr::left_join(fib_blm42_unt_sig, annotations,
                                      by = c("geneID" = "gene_name"))

# DEG output.
volcano_input = fib_blm42_unt_sig
EnhancedVolcano(volcano_input, 
                volcano_input$geneID, 
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c(volcano_input[c(volcano_input$p_val_adj < 1e-5 & 
                                                (volcano_input$avg_log2FC > 1.5 |
                                                   volcano_input$avg_log2FC < -1.5)),]$geneID,
                              "Antxr1"),
                title = "Fibroblasts BLM Day 42 vs. Fibroblasts Untreated",
                pointSize = 1.0,
                colAlpha = 0.8,
                drawConnectors = TRUE,
                legendPosition = "bottom",
                legendLabSize = 12.0,
                legendIconSize = 3.0)
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest
## non-zero p-value...

8.7: DEG - Fibroblasts - BLM day 63 vs. Untreated

# Find differential expression genes between BLM day 63 and untreated.
# Positive logFC means ident.1 is upregulated relative to ident.2.
grep("Fibroblasts", Idents(pseudo_seu), value = TRUE) %>% unique()
## [1] "Fibroblasts-BLM-Day 14" "Fibroblasts-BLM-Day 28" "Fibroblasts-BLM-Day 3" 
## [4] "Fibroblasts-BLM-Day 42" "Fibroblasts-BLM-Day 63" "Fibroblasts-BLM-Day 7" 
## [7] "Fibroblasts-Untreated"
fib_blm63_unt <- FindMarkers(pseudo_seu, 
                             ident.1 = "Fibroblasts-BLM-Day 63",
                             ident.2 = "Fibroblasts-Untreated",
                             test.use = "DESeq2")
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
head(fib_blm63_unt, n = 15)
##                       p_val avg_log2FC pct.1 pct.2     p_val_adj
## Grip2          0.000000e+00 -0.9567409     1     1  0.000000e+00
## Tpt1-ps3       0.000000e+00 -1.0988201     1     1  0.000000e+00
## Pdzd2         8.418776e-308 -0.9692257     1     1 2.896648e-303
## Pcolce2       7.903940e-239 -0.7078980     1     1 2.719509e-234
## Slc7a10       1.726133e-184 -1.2471717     1     1 5.939105e-180
## Vegfa         5.263160e-172 -1.1186799     1     1 1.810895e-167
## Dipk2a        1.379317e-154 -0.8997302     1     1 4.745817e-150
## Mmp14         8.699030e-149  1.0691436     1     1 2.993075e-144
## Tpt1          3.016799e-143 -0.5824267     1     1 1.037990e-138
## Gas6          4.192713e-143  0.8717162     1     1 1.442587e-138
## Sfrp1         3.465257e-131  1.5966637     1     1 1.192291e-126
## Prkg2         7.026898e-109 -0.7699534     1     1 2.417745e-104
## Scn3a         8.488772e-104 -0.8645322     1     1  2.920732e-99
## Cdo1          1.460957e-101 -0.6555618     1     1  5.026716e-97
## B230206H07Rik  2.831565e-95 -0.5427043     1     1  9.742565e-91
# Filter out genes that are p.adjusted > 0.05.
dim(fib_blm63_unt)[1] # 32,217 hits before filtering.
## [1] 31834
fib_blm63_unt_sig <- fib_blm63_unt[which(fib_blm63_unt$p_val_adj < 0.05),]
dim(fib_blm63_unt_sig)[1] # 2,500 after filtering for p-adjusted values.
## [1] 1662
# Add gene annotations to the DEG output.
fib_blm63_unt_sig <- fib_blm63_unt_sig %>% rownames_to_column("geneID")
fib_blm63_unt_sig <- dplyr::left_join(fib_blm63_unt_sig, annotations,
                                      by = c("geneID" = "gene_name"))

# DEG output.
volcano_input = fib_blm63_unt_sig
EnhancedVolcano(volcano_input, 
                volcano_input$geneID, 
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c(volcano_input[c(volcano_input$p_val_adj < 1e-5 & 
                                                (volcano_input$avg_log2FC > 1 |
                                                   volcano_input$avg_log2FC < -1)),]$geneID,
                              "Antxr1"),
                title = "Fibroblasts BLM Day 63 vs. Fibroblasts Untreated",
                pointSize = 1.0,
                colAlpha = 0.8,
                drawConnectors = TRUE,
                legendPosition = "bottom",
                legendLabSize = 12.0,
                legendIconSize = 3.0)
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest
## non-zero p-value...

8.8: UpSet Plot - DEG Overlap between Comparisons

# Combine DEG results as a list.
deg_list = list(BLM3days_vs_UT = fib_blm3_unt_sig$geneID,
                BLM7days_vs_UT = fib_blm7_unt_sig$geneID,
                BLM14days_vs_UT = fib_blm14_unt_sig$geneID,
                BLM28days_vs_UT = fib_blm28_unt_sig$geneID,
                BLM42days_vs_UT = fib_blm42_unt_sig$geneID,
                BLM63days_vs_UT = fib_blm63_unt_sig$geneID)
head(list_to_matrix(deg_list))
##               BLM3days_vs_UT BLM7days_vs_UT BLM14days_vs_UT BLM28days_vs_UT
## 0610009B22Rik              0              1               0               0
## 1110008P14Rik              1              1               1               1
## 1110018N20Rik              0              0               0               1
## 1110032A03Rik              1              0               0               0
## 1110038B12Rik              1              1               1               1
## 1110038F14Rik              0              0               0               1
##               BLM42days_vs_UT BLM63days_vs_UT
## 0610009B22Rik               0               0
## 1110008P14Rik               1               0
## 1110018N20Rik               0               0
## 1110032A03Rik               0               0
## 1110038B12Rik               0               0
## 1110038F14Rik               0               0
# Make the combination matrix.
m = make_comb_mat(deg_list)
m[1:4]
## A combination matrix with 6 sets and 4 combinations.
##   ranges of combination set size: c(49, 423).
##   mode for the combination size: distinct.
##   sets are on rows.
## 
## Combination sets are:
##   BLM3days_vs_UT BLM7days_vs_UT BLM14days_vs_UT BLM28days_vs_UT BLM42days_vs_UT BLM63days_vs_UT   code size
##                x              x               x               x               x               x 111111  423
##                x              x               x               x               x                 111110  294
##                x              x               x               x                               x 111101   49
##                x              x               x                               x               x 111011   53
## 
## Sets are:
##               set size
##    BLM3days_vs_UT 3366
##    BLM7days_vs_UT 4028
##   BLM14days_vs_UT 3240
##   BLM28days_vs_UT 2801
##   BLM42days_vs_UT 2471
##   BLM63days_vs_UT 1662
# Plot the interaction in an UpSet plot.
UpSet(m, set_order = c("BLM3days_vs_UT", 
                       "BLM7days_vs_UT", 
                       "BLM14days_vs_UT", 
                       "BLM28days_vs_UT",
                       "BLM42days_vs_UT",
                       "BLM63days_vs_UT"))

References

Analysis codes are adapted from HBCtraining and Ouyang Lab with credits going to the original authors of the publications cited in the book.